From f5f1e2a8d2bd951dbd4ee813f1a2744632b65332 Mon Sep 17 00:00:00 2001 From: "kaf24@scramble.cl.cam.ac.uk" Date: Tue, 30 Nov 2004 17:17:07 +0000 Subject: [PATCH] bitkeeper revision 1.1159.187.42 (41acab13co1dnyoD3jJfv5m4ox_FFg) Major balloon driver cleanups. This has required some moderately large changes to start-of-day code in Linux 2.4 and 2.6. --- .../arch/xen/kernel/setup.c | 20 +- linux-2.4.28-xen-sparse/arch/xen/mm/init.c | 49 +-- .../arch/xen/i386/kernel/setup.c | 34 +- .../arch/xen/i386/mm/init.c | 84 ++--- .../drivers/xen/balloon/balloon.c | 350 ++++-------------- .../include/asm-xen/hypervisor.h | 1 - 6 files changed, 170 insertions(+), 368 deletions(-) diff --git a/linux-2.4.28-xen-sparse/arch/xen/kernel/setup.c b/linux-2.4.28-xen-sparse/arch/xen/kernel/setup.c index 72454469d0..3b27174ba6 100644 --- a/linux-2.4.28-xen-sparse/arch/xen/kernel/setup.c +++ b/linux-2.4.28-xen-sparse/arch/xen/kernel/setup.c @@ -275,7 +275,8 @@ void __init setup_arch(char **cmdline_p) * arch/xen/drivers/balloon/balloon.c */ mem_param = parse_mem_cmdline(cmdline_p); - if (!mem_param) mem_param = xen_start_info.nr_pages; + if (mem_param < xen_start_info.nr_pages) + mem_param = xen_start_info.nr_pages; #define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) #define PFN_DOWN(x) ((x) >> PAGE_SHIFT) @@ -303,6 +304,7 @@ void __init setup_arch(char **cmdline_p) printk(KERN_WARNING "Use a PAE enabled kernel.\n"); else printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n"); + max_pfn = lmax_low_pfn; #else /* !CONFIG_HIGHMEM */ #ifndef CONFIG_X86_PAE if (max_pfn > MAX_NONPAE_PFN) { @@ -350,8 +352,6 @@ void __init setup_arch(char **cmdline_p) */ max_low_pfn = lmax_low_pfn; - - #ifdef CONFIG_BLK_DEV_INITRD if ( xen_start_info.mod_start != 0 ) { @@ -375,6 +375,20 @@ void __init setup_arch(char **cmdline_p) paging_init(); + /* Make sure we have a large enough P->M table. */ + if ( max_pfn > xen_start_info.nr_pages ) + { + phys_to_machine_mapping = alloc_bootmem_low_pages( + max_pfn * sizeof(unsigned long)); + memset(phys_to_machine_mapping, ~0, max_pfn * sizeof(unsigned long)); + memcpy(phys_to_machine_mapping, + (unsigned long *)xen_start_info.mfn_list, + xen_start_info.nr_pages * sizeof(unsigned long)); + free_bootmem(__pa(xen_start_info.mfn_list), + PFN_PHYS(PFN_UP(xen_start_info.nr_pages * + sizeof(unsigned long)))); + } + pfn_to_mfn_frame_list = alloc_bootmem_low_pages(PAGE_SIZE); for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ ) { diff --git a/linux-2.4.28-xen-sparse/arch/xen/mm/init.c b/linux-2.4.28-xen-sparse/arch/xen/mm/init.c index 6a694022bf..dd622aebda 100644 --- a/linux-2.4.28-xen-sparse/arch/xen/mm/init.c +++ b/linux-2.4.28-xen-sparse/arch/xen/mm/init.c @@ -213,23 +213,16 @@ static void __init fixrange_init (unsigned long start, static void __init pagetable_init (void) { - unsigned long vaddr, end; + unsigned long vaddr, end, ram_end; pgd_t *kpgd, *pgd, *pgd_base; int i, j, k; pmd_t *kpmd, *pmd; pte_t *kpte, *pte, *pte_base; - /* create tables only for boot_pfn frames. max_low_pfn may be sized for - * pages yet to be allocated from the hypervisor, or it may be set - * to override the xen_start_info amount of memory - */ - int boot_pfn = min(xen_start_info.nr_pages,max_low_pfn); - - /* - * This can be zero as well - no problem, in that case we exit - * the loops anyway due to the PTRS_PER_* conditions. - */ - end = (unsigned long)__va(boot_pfn *PAGE_SIZE); + end = (unsigned long)__va(max_low_pfn * PAGE_SIZE); + ram_end = (unsigned long)__va(xen_start_info.nr_pages * PAGE_SIZE); + if ( ram_end > end ) + ram_end = end; pgd_base = init_mm.pgd; i = __pgd_offset(PAGE_OFFSET); @@ -237,12 +230,12 @@ static void __init pagetable_init (void) for (; i < PTRS_PER_PGD; pgd++, i++) { vaddr = i*PGDIR_SIZE; - if (end && (vaddr >= end)) + if (vaddr >= end) break; pmd = (pmd_t *)pgd; for (j = 0; j < PTRS_PER_PMD; pmd++, j++) { vaddr = i*PGDIR_SIZE + j*PMD_SIZE; - if (end && (vaddr >= end)) + if (vaddr >= end) break; /* Filled in for us already? */ @@ -250,10 +243,11 @@ static void __init pagetable_init (void) continue; pte_base = pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); + clear_page(pte_base); for (k = 0; k < PTRS_PER_PTE; pte++, k++) { vaddr = i*PGDIR_SIZE + j*PMD_SIZE + k*PAGE_SIZE; - if (end && (vaddr >= end)) + if (vaddr >= ram_end) break; *pte = mk_pte_phys(__pa(vaddr), PAGE_KERNEL); } @@ -329,28 +323,14 @@ static inline int page_is_ram (unsigned long pagenr) return 1; } -static inline int page_kills_ppro(unsigned long pagenr) -{ - return 0; -} - #ifdef CONFIG_HIGHMEM -void __init one_highpage_init(struct page *page, int pfn, int bad_ppro) +void __init one_highpage_init(struct page *page, int free_page) { - if (!page_is_ram(pfn)) { - SetPageReserved(page); - return; - } - - if (bad_ppro && page_kills_ppro(pfn)) { - SetPageReserved(page); - return; - } - ClearPageReserved(page); set_bit(PG_highmem, &page->flags); atomic_set(&page->count, 1); - __free_page(page); + if ( free_page ) + __free_page(page); totalhigh_pages++; } #endif /* CONFIG_HIGHMEM */ @@ -392,8 +372,9 @@ static int __init free_pages_init(void) reservedpages++; } #ifdef CONFIG_HIGHMEM - for (pfn = xen_start_info.nr_pages-1; pfn >= highstart_pfn; pfn--) - one_highpage_init((struct page *) (mem_map + pfn), pfn, bad_ppro); + for (pfn = highend_pfn-1; pfn >= highstart_pfn; pfn--) + one_highpage_init((struct page *) (mem_map + pfn), pfn, + (pfn < xen_start_info.nr_pages)); totalram_pages += totalhigh_pages; #endif return reservedpages; diff --git a/linux-2.6.9-xen-sparse/arch/xen/i386/kernel/setup.c b/linux-2.6.9-xen-sparse/arch/xen/i386/kernel/setup.c index 8eb0add9d6..049e4aa0f6 100644 --- a/linux-2.6.9-xen-sparse/arch/xen/i386/kernel/setup.c +++ b/linux-2.6.9-xen-sparse/arch/xen/i386/kernel/setup.c @@ -52,6 +52,9 @@ #include "setup_arch_pre.h" #include +/* Allows setting of maximum possible memory size */ +static unsigned long xen_override_max_pfn; + int disable_pse __initdata = 0; /* @@ -718,8 +721,13 @@ static void __init parse_cmdline_early (char ** cmdline_p) unsigned long long mem_size; mem_size = memparse(from+4, &from); +#if 0 limit_regions(mem_size); userdef=1; +#else + xen_override_max_pfn = + (unsigned long)(mem_size>>PAGE_SHIFT); +#endif } } @@ -857,6 +865,7 @@ static void __init parse_cmdline_early (char ** cmdline_p) } } +#if 0 /* !XEN */ /* * Callback for efi_memory_walk. */ @@ -873,7 +882,6 @@ efi_find_max_pfn(unsigned long start, unsigned long end, void *arg) return 0; } - /* * Find the highest page frame number we have available */ @@ -900,6 +908,15 @@ void __init find_max_pfn(void) max_pfn = end; } } +#else +/* We don't use the fake e820 because we need to respond to user override. */ +void __init find_max_pfn(void) +{ + if ( xen_override_max_pfn < xen_start_info.nr_pages ) + xen_override_max_pfn = xen_start_info.nr_pages; + max_pfn = xen_override_max_pfn; +} +#endif /* XEN */ /* * Determine low and high memory ranges: @@ -1414,6 +1431,21 @@ void __init setup_arch(char **cmdline_p) #endif paging_init(); + /* Make sure we have a large enough P->M table. */ + if (max_pfn > xen_start_info.nr_pages) { + phys_to_machine_mapping = alloc_bootmem_low_pages( + max_pfn * sizeof(unsigned long)); + memset(phys_to_machine_mapping, ~0, + max_pfn * sizeof(unsigned long)); + memcpy(phys_to_machine_mapping, + (unsigned long *)xen_start_info.mfn_list, + xen_start_info.nr_pages * sizeof(unsigned long)); + free_bootmem( + __pa(xen_start_info.mfn_list), + PFN_PHYS(PFN_UP(xen_start_info.nr_pages * + sizeof(unsigned long)))); + } + pfn_to_mfn_frame_list = alloc_bootmem_low_pages(PAGE_SIZE); for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ ) { diff --git a/linux-2.6.9-xen-sparse/arch/xen/i386/mm/init.c b/linux-2.6.9-xen-sparse/arch/xen/i386/mm/init.c index fbf48798d3..e2408ea3f7 100644 --- a/linux-2.6.9-xen-sparse/arch/xen/i386/mm/init.c +++ b/linux-2.6.9-xen-sparse/arch/xen/i386/mm/init.c @@ -77,6 +77,12 @@ static pte_t * __init one_page_table_init(pmd_t *pmd) { if (pmd_none(*pmd)) { pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); + /* XEN: Make the new p.t. read-only. */ + pgd_t *kpgd = pgd_offset_k((unsigned long)page_table); + pmd_t *kpmd = pmd_offset(kpgd, (unsigned long)page_table); + pte_t *kpte = pte_offset_kernel(kpmd, (unsigned long)page_table); + xen_l1_entry_update( + kpte, (*(unsigned long *)kpte)&~_PAGE_RW); set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); if (page_table != pte_offset_kernel(pmd, 0)) BUG(); @@ -141,25 +147,6 @@ void __init protect_page(pgd_t *pgd, void *page, int mode) pte_val_ma(*pte) | _PAGE_RW); } -void __init protect_pagetable(pgd_t *dpgd, pgd_t *spgd, int mode) -{ - pmd_t *pmd; - pte_t *pte; - int pgd_idx, pmd_idx; - - protect_page(dpgd, spgd, mode); - - for (pgd_idx = 0; pgd_idx < PTRS_PER_PGD_NO_HV; spgd++, pgd_idx++) { - pmd = pmd_offset(spgd, 0); - if (pmd_none(*pmd)) - continue; - for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD; pmd++, pmd_idx++) { - pte = pte_offset_kernel(pmd, 0); - protect_page(dpgd, pte, mode); - } - } -} - static inline int is_kernel_text(unsigned long addr) { if (addr >= (unsigned long)_stext && addr <= (unsigned long)__init_end) @@ -180,6 +167,10 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) pte_t *pte; int pgd_idx, pmd_idx, pte_ofs; + unsigned long max_ram_pfn = xen_start_info.nr_pages; + if (max_ram_pfn > max_low_pfn) + max_ram_pfn = max_low_pfn; + pgd_idx = pgd_index(PAGE_OFFSET); pgd = pgd_base + pgd_idx; pfn = 0; @@ -207,7 +198,10 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) pte = one_page_table_init(pmd); pte += pte_ofs; - for (; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++) { + /* XEN: Only map initial RAM allocation. */ + for (; pte_ofs < PTRS_PER_PTE && pfn < max_ram_pfn; pte++, pfn++, pte_ofs++) { + if (pte_present(*pte)) + continue; if (is_kernel_text(address)) set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); else @@ -311,7 +305,8 @@ void __init one_highpage_init(struct page *page, int pfn, int bad_ppro) ClearPageReserved(page); set_bit(PG_highmem, &page->flags); set_page_count(page, 1); - __free_page(page); + if (pfn < xen_start_info.nr_pages) + __free_page(page); totalhigh_pages++; } else SetPageReserved(page); @@ -347,7 +342,8 @@ extern void __init remap_numa_kva(void); static void __init pagetable_init (void) { unsigned long vaddr; - pgd_t *pgd_base = swapper_pg_dir; + pgd_t *old_pgd = (pgd_t *)xen_start_info.pt_base; + pgd_t *new_pgd = swapper_pg_dir; #ifdef CONFIG_X86_PAE int i; @@ -368,7 +364,22 @@ static void __init pagetable_init (void) __PAGE_KERNEL_EXEC |= _PAGE_GLOBAL; } - kernel_physical_mapping_init(pgd_base); + /* + * Switch to proper mm_init page directory. Initialise from the current + * page directory, write-protect the new page directory, then switch to + * it. We clean up by write-enabling and then freeing the old page dir. + */ + memcpy(new_pgd, old_pgd, PTRS_PER_PGD_NO_HV*sizeof(pgd_t)); + protect_page(new_pgd, new_pgd, PROT_ON); + queue_pgd_pin(__pa(new_pgd)); + load_cr3(new_pgd); + queue_pgd_unpin(__pa(old_pgd)); + __flush_tlb_all(); /* implicit flush */ + protect_page(new_pgd, old_pgd, PROT_OFF); + flush_page_update_queue(); + free_bootmem(__pa(old_pgd), PAGE_SIZE); + + kernel_physical_mapping_init(new_pgd); remap_numa_kva(); /* @@ -376,9 +387,9 @@ static void __init pagetable_init (void) * created - mappings will be set by set_fixmap(): */ vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; - page_table_range_init(vaddr, 0, pgd_base); + page_table_range_init(vaddr, 0, new_pgd); - permanent_kmaps_init(pgd_base); + permanent_kmaps_init(new_pgd); #ifdef CONFIG_X86_PAE /* @@ -388,7 +399,7 @@ static void __init pagetable_init (void) * All user-space mappings are explicitly cleared after * SMP startup. */ - pgd_base[0] = pgd_base[USER_PTRS_PER_PGD]; + new_pgd[0] = new_pgd[USER_PTRS_PER_PGD]; #endif } @@ -545,8 +556,6 @@ out: */ void __init paging_init(void) { - pgd_t *old_pgd = (pgd_t *)xen_start_info.pt_base; - pgd_t *new_pgd = swapper_pg_dir; #ifdef CONFIG_XEN_PHYSDEV_ACCESS int i; #endif @@ -559,25 +568,6 @@ void __init paging_init(void) pagetable_init(); - /* - * Write-protect both page tables within both page tables. - * That's three ops, as the old p.t. is already protected - * within the old p.t. Then pin the new table, switch tables, - * and unprotect the old table. - */ - protect_pagetable(new_pgd, old_pgd, PROT_ON); - protect_pagetable(new_pgd, new_pgd, PROT_ON); - protect_pagetable(old_pgd, new_pgd, PROT_ON); - queue_pgd_pin(__pa(new_pgd)); - load_cr3(new_pgd); - queue_pgd_unpin(__pa(old_pgd)); - __flush_tlb_all(); /* implicit flush */ - protect_pagetable(new_pgd, old_pgd, PROT_OFF); - flush_page_update_queue(); - - /* Completely detached from old tables, so free them. */ - free_bootmem(__pa(old_pgd), xen_start_info.nr_pt_frames << PAGE_SHIFT); - #ifdef CONFIG_X86_PAE /* * We will bail out later - printk doesn't work right now so diff --git a/linux-2.6.9-xen-sparse/drivers/xen/balloon/balloon.c b/linux-2.6.9-xen-sparse/drivers/xen/balloon/balloon.c index 566a9578dd..94658f1e54 100644 --- a/linux-2.6.9-xen-sparse/drivers/xen/balloon/balloon.c +++ b/linux-2.6.9-xen-sparse/drivers/xen/balloon/balloon.c @@ -4,6 +4,7 @@ * Xen balloon driver - enables returning/claiming memory to/from Xen. * * Copyright (c) 2003, B Dragovic + * Copyright (c) 2003-2004, M Williamson, K Fraser * * This file may be distributed separately from the Linux kernel, or * incorporated into other software packages, subject to the following license: @@ -48,19 +49,10 @@ #include #include -/* USER DEFINES -- THESE SHOULD BE COPIED TO USER-SPACE TOOLS */ -#define USER_INFLATE_BALLOON 1 /* return mem to hypervisor */ -#define USER_DEFLATE_BALLOON 2 /* claim mem from hypervisor */ -typedef struct user_balloon_op { - unsigned int op; - unsigned long size; -} user_balloon_op_t; -/* END OF USER DEFINE */ - static struct proc_dir_entry *balloon_pde; unsigned long credit; -static unsigned long current_pages, most_seen_pages; +static unsigned long current_pages; #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) /* Use the private and mapping fields of struct page as a list. */ @@ -78,71 +70,66 @@ static unsigned long current_pages, most_seen_pages; #define pte_offset_kernel pte_offset #endif +#define IPRINTK(fmt, args...) \ + printk(KERN_INFO "xen_mem: " fmt, ##args) +#define WPRINTK(fmt, args...) \ + printk(KERN_WARNING "xen_mem: " fmt, ##args) + /* List of ballooned pages, threaded through the mem_map array. */ LIST_HEAD(ballooned_pages); -/** add_ballooned_page - remember we've ballooned a pfn */ -void add_ballooned_page(unsigned long pfn) +/* balloon_append: add the given page to the balloon. */ +void balloon_append(struct page *page) { - struct page *p = mem_map + pfn; - - list_add(PAGE_TO_LIST(p), &ballooned_pages); + list_add(PAGE_TO_LIST(page), &ballooned_pages); } -/* rem_ballooned_page - recall a ballooned page and remove from list. */ -struct page *rem_ballooned_page(void) +/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */ +struct page *balloon_retrieve(void) { - if(!list_empty(&ballooned_pages)) - { - struct page *ret; - - ret = LIST_TO_PAGE(ballooned_pages.next); - UNLIST_PAGE(ret); + struct page *page; - return ret; - } - else + if ( list_empty(&ballooned_pages) ) return NULL; + + page = LIST_TO_PAGE(ballooned_pages.next); + UNLIST_PAGE(page); + return page; } static inline pte_t *get_ptep(unsigned long addr) { - pgd_t *pgd; pmd_t *pmd; pte_t *ptep; - pgd = pgd_offset_k(addr); + pgd_t *pgd; + pmd_t *pmd; + pgd = pgd_offset_k(addr); if ( pgd_none(*pgd) || pgd_bad(*pgd) ) BUG(); pmd = pmd_offset(pgd, addr); if ( pmd_none(*pmd) || pmd_bad(*pmd) ) BUG(); - ptep = pte_offset_kernel(pmd, addr); - - return ptep; + return pte_offset_kernel(pmd, addr); } /* Main function for relinquishing memory. */ static unsigned long inflate_balloon(unsigned long num_pages) - { - unsigned long *parray; - unsigned long *currp; - unsigned long curraddr; - unsigned long ret = 0; - unsigned long i, j; + unsigned long *parray, *currp, curraddr, ret = 0, i, j, mfn, pfn; + struct page *page; parray = (unsigned long *)vmalloc(num_pages * sizeof(unsigned long)); if ( parray == NULL ) { - printk(KERN_ERR "inflate_balloon: Unable to vmalloc parray\n"); - return -EFAULT; + WPRINTK("inflate_balloon: Unable to vmalloc parray\n"); + return -ENOMEM; } currp = parray; for ( i = 0; i < num_pages; i++, currp++ ) { - struct page *page = alloc_page(GFP_HIGHUSER); - unsigned long pfn = page - mem_map; + page = alloc_page(GFP_HIGHUSER); + pfn = page - mem_map; /* If allocation fails then free all reserved pages. */ if ( page == NULL ) @@ -160,10 +147,9 @@ static unsigned long inflate_balloon(unsigned long num_pages) *currp = pfn; } - for ( i = 0, currp = parray; i < num_pages; i++, currp++ ) { - unsigned long mfn = phys_to_machine_mapping[*currp]; + mfn = phys_to_machine_mapping[*currp]; curraddr = (unsigned long)page_address(mem_map + *currp); /* Blow away page contents for security, and also p.t. ref if any. */ if ( curraddr != 0 ) @@ -180,7 +166,7 @@ static unsigned long inflate_balloon(unsigned long num_pages) } #endif - add_ballooned_page(*currp); + balloon_append(&mem_map[*currp]); phys_to_machine_mapping[*currp] = INVALID_P2M_ENTRY; *currp = mfn; @@ -206,62 +192,45 @@ static unsigned long inflate_balloon(unsigned long num_pages) return ret; } -/* - * Install new mem pages obtained by deflate_balloon. function walks - * phys->machine mapping table looking for DEAD entries and populates - * them. - */ -static unsigned long process_returned_pages(unsigned long * parray, - unsigned long num) +/* Install a set of new pages (@mfn_list, @nr_mfns) into the memory map. */ +static unsigned long process_returned_pages( + unsigned long *mfn_list, unsigned long nr_mfns) { - /* currently, this function is rather simplistic as - * it is assumed that domain reclaims only number of - * pages previously released. this is to change soon - * and the code to extend page tables etc. will be - * incorporated here. - */ - - unsigned long * curr = parray; - unsigned long num_installed; - + unsigned long pfn, i; struct page *page; - num_installed = 0; - while ( (page = rem_ballooned_page()) != NULL ) + for ( i = 0; i < nr_mfns; i++ ) { - unsigned long pfn; - - if ( num_installed == num ) + if ( (page = balloon_retrieve()) != NULL ) break; pfn = page - mem_map; - - if(phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY) - { - printk("BUG: Tried to unballoon existing page!"); + if ( phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY ) BUG(); - } - phys_to_machine_mapping[pfn] = *curr; - queue_machphys_update(*curr, pfn); - if (pfnM and M->P tables. */ + phys_to_machine_mapping[pfn] = mfn_list[i]; + queue_machphys_update(mfn_list[i], pfn); + + /* Link back into the page tables if it's not a highmem page. */ + if ( pfn < max_low_pfn ) queue_l1_entry_update( get_ptep((unsigned long)__va(pfn << PAGE_SHIFT)), - ((*curr) << PAGE_SHIFT) | pgprot_val(PAGE_KERNEL)); - - __free_page(mem_map + pfn); + (mfn_list[i] << PAGE_SHIFT) | pgprot_val(PAGE_KERNEL)); - curr++; - num_installed++; + /* Finally, relinquish the memory back to the system allocator. */ + ClearPageReserved(page); + set_page_count(page, 1); + __free_page(page); } - return num_installed; + return i; } unsigned long deflate_balloon(unsigned long num_pages) { unsigned long ret; - unsigned long * parray; + unsigned long *parray; if ( num_pages > credit ) { @@ -305,205 +274,25 @@ unsigned long deflate_balloon(unsigned long num_pages) #define PAGE_TO_MB_SHIFT 8 -/* - * pagetable_extend() mimics pagetable_init() from arch/xen/mm/init.c - * The loops do go through all of low memory (ZONE_NORMAL). The - * old pages have _PAGE_PRESENT set and so get skipped. - * If low memory is not full, the new pages are used to fill it, going - * from cur_low_pfn to low_pfn. high memory is not direct mapped so - * no extension is needed for new high memory. - */ - -static void pagetable_extend (int cur_low_pfn, int newpages) -{ - unsigned long vaddr, end; - pgd_t *kpgd, *pgd, *pgd_base; - int i, j, k; - pmd_t *kpmd, *pmd; - pte_t *kpte, *pte, *pte_base; - int low_pfn = min(cur_low_pfn+newpages,(int)max_low_pfn); - - /* - * This can be zero as well - no problem, in that case we exit - * the loops anyway due to the PTRS_PER_* conditions. - */ - end = (unsigned long)__va(low_pfn*PAGE_SIZE); - - pgd_base = init_mm.pgd; -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) - i = pgd_index(PAGE_OFFSET); -#else - i = __pgd_offset(PAGE_OFFSET); -#endif - pgd = pgd_base + i; - - for (; i < PTRS_PER_PGD; pgd++, i++) { - vaddr = i*PGDIR_SIZE; - if (end && (vaddr >= end)) - break; - pmd = (pmd_t *)pgd; - for (j = 0; j < PTRS_PER_PMD; pmd++, j++) { - vaddr = i*PGDIR_SIZE + j*PMD_SIZE; - if (end && (vaddr >= end)) - break; - - /* Filled in for us already? */ - if ( pmd_val(*pmd) & _PAGE_PRESENT ) - continue; - - pte_base = pte = (pte_t *) __get_free_page(GFP_KERNEL); - - for (k = 0; k < PTRS_PER_PTE; pte++, k++) { - vaddr = i*PGDIR_SIZE + j*PMD_SIZE + k*PAGE_SIZE; - if (end && (vaddr >= end)) - break; -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) - *pte = mk_pte(virt_to_page(vaddr), PAGE_KERNEL); -#else - *pte = mk_pte_phys(__pa(vaddr), PAGE_KERNEL); -#endif - } - kpgd = pgd_offset_k((unsigned long)pte_base); - kpmd = pmd_offset(kpgd, (unsigned long)pte_base); - kpte = pte_offset_kernel(kpmd, (unsigned long)pte_base); - queue_l1_entry_update(kpte, - (*(unsigned long *)kpte)&~_PAGE_RW); - set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte_base))); - XEN_flush_page_update_queue(); - } - } -} - -/* - * claim_new_pages() asks xen to increase this domain's memory reservation - * and return a list of the new pages of memory. This new pages are - * added to the free list of the memory manager. - * - * Available RAM does not normally change while Linux runs. To make this work, - * the linux mem= boottime command line param must say how big memory could - * possibly grow. Then setup_arch() in arch/xen/kernel/setup.c - * sets max_pfn, max_low_pfn and the zones according to - * this max memory size. The page tables themselves can only be - * extended after xen has assigned new pages to this domain. - */ - -static unsigned long -claim_new_pages(unsigned long num_pages) -{ - unsigned long new_page_cnt, pfn; - unsigned long * parray, *curr; - - if (most_seen_pages+num_pages> max_pfn) - num_pages = max_pfn-most_seen_pages; - if (num_pages==0) return -EINVAL; - - parray = (unsigned long *)vmalloc(num_pages * sizeof(unsigned long)); - if ( parray == NULL ) - { - printk(KERN_ERR "claim_new_pages: Unable to vmalloc parray\n"); - return 0; - } - - new_page_cnt = HYPERVISOR_dom_mem_op(MEMOP_increase_reservation, - parray, num_pages, 0); - if ( new_page_cnt != num_pages ) - { - printk(KERN_WARNING - "claim_new_pages: xen granted only %lu of %lu requested pages\n", - new_page_cnt, num_pages); - - /* - * Avoid xen lockup when user forgot to setdomainmaxmem. Xen - * usually can dribble out a few pages and then hangs. - */ - if ( new_page_cnt < 1000 ) - { - printk(KERN_WARNING "Remember to use setdomainmaxmem\n"); - HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation, - parray, new_page_cnt, 0); - return -EFAULT; - } - } - memcpy(phys_to_machine_mapping+most_seen_pages, parray, - new_page_cnt * sizeof(unsigned long)); - - pagetable_extend(most_seen_pages,new_page_cnt); - - for ( pfn = most_seen_pages, curr = parray; - pfn < most_seen_pages+new_page_cnt; - pfn++, curr++ ) - { - struct page *page = mem_map + pfn; - -#ifndef CONFIG_HIGHMEM - if ( pfn>=max_low_pfn ) - { - printk(KERN_WARNING "Warning only %ldMB will be used.\n", - pfn>>PAGE_TO_MB_SHIFT); - printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n"); - break; - } -#endif - queue_machphys_update(*curr, pfn); - if ( pfn < max_low_pfn ) - queue_l1_entry_update( - get_ptep((unsigned long)__va(pfn << PAGE_SHIFT)), - ((*curr) << PAGE_SHIFT) | pgprot_val(PAGE_KERNEL)); - - XEN_flush_page_update_queue(); - - /* this next bit mimics arch/xen/mm/init.c:one_highpage_init() */ - ClearPageReserved(page); - if ( pfn >= max_low_pfn ) - set_bit(PG_highmem, &page->flags); - set_page_count(page, 1); - __free_page(page); - } - - vfree(parray); - - return new_page_cnt; -} - - static int balloon_try_target(int target) { int change, reclaim; if ( target < current_pages ) { - int change = inflate_balloon(current_pages-target); - if ( change <= 0 ) + if ( (change = inflate_balloon(current_pages-target)) <= 0 ) return change; - current_pages -= change; printk(KERN_INFO "Relinquish %dMB to xen. Domain now has %luMB\n", change>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT); } - else if ( target > current_pages ) + else if ( (reclaim = target - current_pages) > 0 ) { - reclaim = min((unsigned long)target,most_seen_pages) - current_pages; - - if ( reclaim ) - { - change = deflate_balloon( reclaim ); - if ( change <= 0 ) - return change; - current_pages += change; - printk(KERN_INFO "Reclaim %dMB from xen. Domain now has %luMB\n", - change>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT); - } - - if ( most_seen_pages < target ) - { - int growth = claim_new_pages(target-most_seen_pages); - if ( growth <= 0 ) - return growth; - most_seen_pages += growth; - current_pages += growth; - printk(KERN_INFO "Granted %dMB new mem. Dom now has %luMB\n", - growth>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT); - } + if ( (change = deflate_balloon(reclaim)) <= 0 ) + return change; + current_pages += change; + printk(KERN_INFO "Reclaim %dMB from xen. Domain now has %luMB\n", + change>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT); } return 1; @@ -640,12 +429,15 @@ static int balloon_read(char *page, char **start, off_t off, static int __init balloon_init(void) { - printk(KERN_ALERT "Starting Xen Balloon driver\n"); + unsigned long pfn; + struct page *page; + + IPRINTK("Initialising balloon driver.\n"); - most_seen_pages = current_pages = min(xen_start_info.nr_pages,max_pfn); + current_pages = min(xen_start_info.nr_pages, max_pfn); if ( (balloon_pde = create_xen_proc_entry("memory_target", 0644)) == NULL ) { - printk(KERN_ALERT "Unable to create balloon driver proc entry!"); + WPRINTK("Unable to create balloon driver proc entry!"); return -1; } @@ -661,18 +453,12 @@ static int __init balloon_init(void) (void)ctrl_if_register_receiver(CMSG_MEM_REQUEST, balloon_ctrlif_rx, CALLBACK_IN_BLOCKING_CONTEXT); - /* - * make_module a new phys map if mem= says xen can give us memory to grow - */ - if ( max_pfn > xen_start_info.nr_pages ) + /* Initialise the balloon with excess memory space. */ + for ( pfn = xen_start_info.nr_pages; pfn < max_pfn; pfn++ ) { - extern unsigned long *phys_to_machine_mapping; - unsigned long *newmap; - newmap = (unsigned long *)vmalloc(max_pfn * sizeof(unsigned long)); - memset(newmap, ~0, max_pfn * sizeof(unsigned long)); - memcpy(newmap, phys_to_machine_mapping, - xen_start_info.nr_pages * sizeof(unsigned long)); - phys_to_machine_mapping = newmap; + page = &mem_map[pfn]; + if ( !PageReserved(page) ) + balloon_append(page); } return 0; diff --git a/linux-2.6.9-xen-sparse/include/asm-xen/hypervisor.h b/linux-2.6.9-xen-sparse/include/asm-xen/hypervisor.h index 235c4fc5a6..85f0f1162c 100644 --- a/linux-2.6.9-xen-sparse/include/asm-xen/hypervisor.h +++ b/linux-2.6.9-xen-sparse/include/asm-xen/hypervisor.h @@ -59,7 +59,6 @@ void do_hypervisor_callback(struct pt_regs *regs); #define PROT_ON 1 #define PROT_OFF 0 void /* __init */ protect_page(pgd_t *dpgd, void *page, int mode); -void /* __init */ protect_pagetable(pgd_t *dpgd, pgd_t *spgd, int mode); /* arch/xen/i386/kernel/head.S */ void lgdt_finish(void); -- 2.30.2